home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Libris Britannia 4
/
science library(b).zip
/
science library(b)
/
DTP
/
DTP_TEX
/
H220.ZIP
/
WP2X110.ZIP
/
WP2X.C
< prev
next >
Wrap
C/C++ Source or Header
|
1991-08-18
|
53KB
|
1,516 lines
/* $Id: wp2x.c 1.10 91/08/18 15:05:41 raymond Exp $ */
/* Before compiling, read the section titled `portability concerns'. */
/************************************************************************
* $Log: wp2x.c $
* Revision 1.10 91/08/18 15:05:41 raymond
* Descriptor file stuff.
*
* Revision 1.9 91/08/06 09:08:09 raymond
* add missing `break' in check_arity
*
* Revision 1.8 91/08/06 08:31:21 raymond
* Avoid infinite loop if file is corrupted.
* Better error-checking on configuration file (new output scheme).
*
* Revision 1.7 91/08/02 13:35:37 raymond
* Epsilonically better handling of environments that didn't end properly.
* Change return type of main() to keep gcc quiet.
* MSC support.
*
* Revision 1.6 91/07/28 21:08:53 raymond
* BeginTabs et al, FNote#, ENote#, NegateTotal, more unsupported codes
* Improve character tokens, Header, Footer
* Take care when people don't end lines with HRt
* Fix major bugs in endnote processing, footnote numbering (and nobody
* noticed!)
* More worries about signed characters.
*
* Revision 1.5 91/07/23 22:59:43 raymond
* Add COMMENT token, and some bug fixes.
*
* Revision 1.4 91/07/23 22:09:23 raymond
* Concessions to slightly non-ANSI compilers. (`const', `unsigned char')
* More patches for machines with signed characters.
* Fix blatant bug in hex constants. (Amazed nobody noticed.)
* New tags SetFn#, Header, Footer.
* Warning messages for unsupported tokens.
* Backslahes processed in character tags.
* Fixed(?) footnotes, endnotes, page length changes.
* Inserted missing `break's into the huge switch.
*
* Revision 1.3 91/07/12 15:39:44 raymond
* Spiffy Turbo C support.
* Some <stdlib.h>'s don't declare errno et al.
* Command line switches `-s' and `-n' added.
* More cute warning messages.
* Dots periodically emitted.
* Give the enum of token types a name, to placate QuickC.
* Fix problems with pitch changes and signed characters.
*
* Revision 1.2 91/06/22 08:18:22 raymond
* <process.h> and fputchar() aren't sufficiently portable.
* strerror() fails to exist on some so-called ANSI platforms.
* Removed assumption that characters are unsigned.
* Forgot to #include <stdarg.h>
*
*/
/************************************************************************
* PORTABILITY CONCERNS
************************************************************************
*
* If possible, compile with unsigned characters. (Though I think
* I've taken care of all the places where I assumed characters are
* unsigned.)
*
* This program assumes that your compiler is fully ANSI-conformant.
* Depending on how non-conformant your compiler is, you may need to
* set the following symbols at compile time:
*
* NO_CONST -- set this if your compiler does not know what `const' means.
* Cdecl -- how to tag functions that are variadic.
*
* Cdecl is used if you need special declarations for variadic functions.
* This is used by IBM PC compilers so that you can make the default
* parameter passing Pascal-style or Fastcalls.
*
* Some very machine-dependent stuff happens when trying to open the
* descriptor file. Please read dopen.c as well.
*/
#ifdef NO_CONST
#define const
#endif
#ifndef Cdecl /* default is nothing */
#define Cdecl
#endif
/************************************************************************
* This program divides naturally into two parts.
*
* The first part reads in the descriptor file and builds the expansions
* for each of the identifiers listed above.
* This is the easy part.
*
* The second part reads the input file and uses the expansions collected
* in the first part to transform the file into the output.
* This is the hard part.
*
************************************************************************/
/* And now, the code.
* We start off with some obvious header files.
*/
#include <stdio.h>
#include <stdarg.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
/* Some platforms do not define these externals in stdlib.h */
extern int Cdecl errno;
extern char *Cdecl sys_errlist[];
extern int Cdecl sys_nerr;
/************************************************************************/
/* Some common idioms */
/************************************************************************/
#define do_nothing /* twiddle thumbs */
/************************************************************************/
/* Blowing up */
/************************************************************************/
/* The function "error" accepts two arguments. A FILE pointer and
* a printf-style argument list. The printf-style arguments are
* printed to stderr. If the FILE is non-NULL, the the remaining
* contents of the file are printed as well (to provide context), up
* to 80 characters.
*/
void Cdecl error(FILE *fp, char *fmt, ...)
{
int i;
va_list ap;
fputs("Error: ", stderr);
va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap);
fputc('\n', stderr);
if (fp) {
fprintf(stderr, "Unread text: ");
for (i = 0; i < 80 && !feof(fp); i++) fputc(getc(fp), stderr);
fputc('\n', stderr);
}
exit(1);
}
/************************************************************************/
/* Command-line switches */
/************************************************************************/
int silent = 0;
int blipinterval = 1024; /* display blips every 1K */
int blipcount;
/************************************************************************/
/* Basic file manipulations */
/************************************************************************/
/* We here define a few basic functions. Let us hope that the first
* three functions' names are self-descriptive.
*/
int next_non_whitespace(FILE *fp)
{
register int c;
while ((c = getc(fp)) != EOF && isspace(c)) do_nothing;
return c;
}
int next_non_space_or_tab(FILE *fp)
{
register int c;
while ((c = getc(fp)) != EOF && (c == ' ' || c == '\t')) do_nothing;
return c;
}
void eat_until_newline(FILE *fp)
{
register int c;
while ((c = getc(fp)) != EOF && c != '\n') do_nothing;
}
/* The function parse_hex grabs a (no-more-than-two-character) hex
* constant. Similarly, parse_octal does the same for octal constants.
*/
int parse_hex(FILE *fp)
{
register int c, value;
if (!isxdigit(c = toupper(getc(fp))))
error(fp, "Expecting a hex digit");
if ((value = c - '0') > 9) value += '0' - 'A' + 10;
if (!isxdigit(c = getc(fp))) { ungetc(c, fp); return value; }
c = toupper(c);
value = (value << 4) + c - '0';
if (c > '9') value += '0' - 'A' + 10;
return value;
}
int parse_octal(FILE *fp, register int c)
{
register int value = c - '0';
if ( (c = getc(fp)) < '0' || c > '7') { ungetc(c, fp); return value; }
value = (value << 3) + c - '0';
if ( (c = getc(fp)) < '0' || c > '7') { ungetc(c, fp); return value; }
return (value << 3) + c - '0';
}
/************************************************************************/
/* Storing the input strings */
/************************************************************************/
/* The input strings are allocated from a large pool we set up at
* startup. This lets us do our thing without having to fight
* with people like malloc and friends. This method does limit
* our configuration file to 32K, however. We hope that this is
* not a problem. (It also means that the program can be translated
* to almost any other language without too much difficulty.)
*
* Here's how it works.
*
* "pool" is an array of POOL_SIZE characters. The value of POOL_SIZE
* is flexible, but shouldn't exceed 65535, since that's the size of
* an IBM PC segment. If your configuration file is more than 64K,
* then there's probably something wrong.
*
* "pool_ptr" points to the next character in "pool" that hasn't been
* used for anything yet.
*
* "top_of_pool" points one character beyond the end of pool, so we can
* see if we've run out of memory.
*
* When we want to put something into the pool, we simply store into "pool"
* and increment "pool_ptr" appropriately.
*
* Access to these variables is done through the following functions,
* implemented as macros.
*
* "anchor_string()" is called before you start throwing things into
* the pool. It returns a pointer to the beginning of the string
* being built up.
*
* "add_to_string(c)" adds the character "c" to the string being built up.
*
* "finish_string()" gets ready for building a new string. We check
* that we did not overflow our pool. We pull the sneaky trick of
* a dummy else clause so that [1] "else"s match up properly if this
* is nested inside an "if" statement, [2] the semicolon gets eaten
* up correctly.
*
* "remove_string(s)" removes all strings from the one called "s" onwards.
*
*/
#define POOL_SIZE 32768U
char pool[POOL_SIZE];
char *pool_ptr = pool;
#define top_of_pool (pool + POOL_SIZE)
#define anchor_string() pool_ptr
#define add_to_string(c) (*pool_ptr++ = c)
#define finish_string() \
if (pool_ptr >= top_of_pool) error(NULL, "string pool overflow."); \
else do_nothing
#define remove_string(s) (pool_ptr = s)
/************************************************************************/
/* Remembering the expansions */
/************************************************************************/
/* The array "expansion" contains the expansions for everything.
* Everything is initialized to NULL.
*
* We set up things as follows:
* expansion[0..255] contain the expansions for the possible characters.
* expansion[256...] contain the expansions for the special codes.
*
* Make sure this table is kept in parallel with the names[] array
*
*
*/
/* name value When is it expanded? */
/* ---- --- -------------------- */
enum token_type { /* Some compilers do not like unnamed enums */
typeout = 256, /* Typed out as soon as it is encountered */
BEGIN , /* Before the first character of the file */
END , /* After the last character of the file */
Comment , /* For wp2x-generated comments */
eComment ,
PageNo , /* Current page number */
RomanPage , /* Set page number (to roman numerals) */
ArabicPage , /* Set page number (to arabic) */
HSpace , /* unbreakable space (`Hard space') */
Tab , /* Tab character */
BeginTabs , /* Begin tab settings */
/* DO NOT CHANGE THE RELATIVE ORDER OF THESE FOUR TOKENS */
SetTab , /* Set normal tabstop at %d */
SetTabCenter , /* Set center tabstop at %d */
SetTabRight , /* Set right-justified tab at %d */
SetTabDecimal, /* Set decimal tab at %d */
EndTabs , /* End tab settings */
HPg , /* Hard page break */
CondEOP , /* Conditional end-of-page */
HRt , /* Hard return */
SRt , /* Soft return */
NHyph , /* Normal hyphen */
NHyphE , /* Normal hyphen at the end of a line */
HHyph , /* Hard (nonbreakable) hyphen */
DHyph , /* Discretionary hyphen */
DHyphE , /* Discretionary hyphen at the end of a line */
NoHyphWord , /* Do not hyphenate this word */
Marg , /* Margin settings */
TopMarg , /* Set top margin */
PageLength , /* Set page length */
SS , /* Single spacing */
DS , /* Double spacing */
OHS , /* 1.5 spacing (One and a Half Spacing) */
TS , /* Triple spacing */
LS , /* Generic line spacing */
LPI , /* set 6 or 8 LPI */
Bold , /* Begin boldface */
eBold , /* End boldface */
Und , /* Begin underline */
eUnd , /* End underline */
Red , /* Begin redline */
eRed , /* End redline */
Strike , /* Begin strikeout */
eStrike , /* End strikeout */
Rev , /* Begin reverse video */
eRev , /* End reverse video */
Over , /* Overstrike */
eOver , /* [mythical "end overstroke" code] */
Sup , /* Superscript */
eSup , /* [mythical "end superscript" code] */
Sub , /* Subscript */
eSub , /* [mythical "end subscript" code] */
UpHalfLine , /* Advance printer up 1/2 line */
DownHalfLine , /* Advance printer down 1/2 line */
AdvanceToHalfLine, /* Advance to absolute vertical position */
Indent , /* Indented paragraph */
DIndent , /* Left-and-right-indented paragraph */
eIndent , /* End indented paragraph */
MargRel , /* Margin release (unknown argument) */
Center , /* Center current line */
eCenter , /* End centering */
CenterHere , /* Center line around current column */
eCenterHere , /* End centering */
Align , /* Begin alignment */
eAlign , /* End alignment */
AlignChar , /* Set alignment character */
FlushRight , /* Begin flush right */
eFlushRight , /* End flush right */
Math , /* Begin math mode */
eMath , /* End math mode */
MathCalc , /* Begin math calc mode */
MathCalcColumn, /* Math calc column */
SubTtl , /* Do subtotal */
IsSubTtl , /* Subtotal entry */
Ttl , /* Do total */
IsTtl , /* Total entry */
GrandTtl , /* Do grand total */
NegateTotal , /* Negate current total */
Col , /* Begin column mode */
eCol , /* End column mode */
Fn , /* Begin footnote */
eFn , /* End footnote */
En , /* Begin endnote */
eEn , /* End endnote */
SetFnNum , /* Set footnote number */
FNoteNum , /* Footnote number */
ENoteNum , /* Endnote number */
TableMarker , /* Table of contents or whatever marker */
Hyph , /* Hyphenation on */
eHyph , /* off */
Just , /* Justification on */
eJust , /* off */
Wid , /* Widow/orphan protection on */
eWid , /* off */
HZone , /* Hyphenation zone */
DAlign , /* Decimal alignment character */
Header , /* Begin header text */
eHeader , /* End header text */
Footer , /* Begin footer text */
eFooter , /* End footer text */
Supp , /* Suppress formatting for one page */
CtrPg , /* Center page vertically */
SetFont , /* Change pitch or font */
SetBin , /* Select paper bin (0, 1, ...) */
PN , /* Page number position (PN+0 through PN+8) */
/* Internal tokens for unsupported operations */
UnsupportedPlaceHolder = PN + 9,
SetPageNumberColumn,
SetTabs,
SetUnderlineMode,
DefineColumn,
SetFootnoteAttributes,
SetParagraphNumberingStyle,
NumberedParagraph,
BeginMarkedText,
EndMarkedText,
DefineMarkedText,
DefineIndexMark,
DefineMathColumns,
Obsolete,
ReservedCode,
UnknownCode,
LastToken
};
char *expansion[LastToken];
/************************************************************************/
/* Naming the identifiers */
/************************************************************************/
/* Extreme care must be taken to ensure that this list parallels the list
* of token names above.
*/
typedef struct identifier {
char *name;
int arity;
} Identifier;
Identifier names[] = {
{ "typeout", 0 },
{ "BEGIN", 0 },
{ "END", 0 },
{ "Comment", 0 },
{ "comment", 0 },
{ "PageNo", 0 },
{ "RomanPage", 1 },
{ "ArabicPage", 1 },
{ "HSpace", 0 },
{ "Tab", 0 },
{ "BeginTabs", 0 },
{ "SetTab", 1 },
{ "SetTabCenter", 1 },
{ "SetTabRight", 1 },
{ "SetTabDecimal", 1 },
{ "EndTabs", 0 },
{ "HPg", 0 },
{ "CondEOP", 1 },
{ "HRt", 0 },
{ "SRt", 0 },
{ "-", 0 }, /* NHyph */
{ "--", 0 }, /* NHyphE */
{ "=", 0 }, /* HHyph */
{ "\\-", 0 }, /* DHyph */
{ "\\--", 0 }, /* DHyphE */
{ "NoHyphWord", 0 },
{ "Marg", 2 },
{ "TopMarg", 1 },
{ "PageLength", 1 },
{ "SS", 0 },
{ "DS", 0 },
{ "1.5S", 0 }, /* OHS */
{ "TS", 0 },
{ "LS", 1 },
{ "LPI", 1 },
{ "Bold", 0 },
{ "bold", 0 },
{ "Und", 0 },
{ "und", 0 },
{ "Red", 0 },
{ "red", 0 },
{ "Strike", 0 },
{ "strike", 0 },
{ "Rev", 0 },
{ "rev", 0 },
{ "Over", 0 },
{ "over", 0 },
{ "Sup", 0 },
{ "sup", 0 },
{ "Sub", 0 },
{ "sub", 0 },
{ "UpHalfLine", 0 },
{ "DownHalfLine", 0 },
{ "AdvanceToHalfLine", 2 },
{ "Indent", 0 },
{ "DIndent", 0 },
{ "indent", 0 },
{ "MarginRelease", 1 },
{ "Center", 0 },
{ "center", 0 },
{ "CenterHere", 0 },
{ "centerhere", 0 },
{ "Align", 0 },
{ "align", 0 },
{ "AlignChar", 1 },
{ "FlushRight", 0 },
{ "flushright", 0 },
{ "Math", 0 },
{ "math", 0 },
{ "MathCalc", 0 },
{ "MathCalcColumn", 0 },
{ "SubTotal", 0 },
{ "IsSubTotal", 0 },
{ "Total", 0 },
{ "IsTotal", 0 },
{ "GrandTotal", 0 },
{ "NegateTotal", 0 },
{ "Col", 0 },
{ "col", 0 },
{ "Fn", 0 },
{ "fn", 0 },
{ "En", 0 },
{ "en", 0 },
{ "SetFn#", 1 },
{ "FNote#", 0 },
{ "ENote#", 0 },
{ "TableMarker", 0 },
{ "Hyph", 0 },
{ "hyph", 0 },
{ "Just", 0 },
{ "just", 0 },
{ "Wid", 0 },
{ "wid", 0 },
{ "HZone", 2 },
{ "DAlign", 1 },
{ "Header", 0 },
{ "header", 0 },
{ "Footer", 0 },
{ "footer", 0 },
{ "Supp", 1 },
{ "CtrPg", 0 },
{ "SetFont", 2 },
{ "SetBin", 1 },
{ "PN0", 0 },
{ "PN1", 0 },
{ "PN2", 0 },
{ "PN3", 0 },
{ "PN4", 0 },
{ "PN5", 0 },
{ "PN6", 0 },
{ "PN7", 0 },
{ "PN8", 0 },
{ NULL, 0 }, /* UnsupportedPlaceHolder -- keeps match_identifier happy */
{ "set page number column", 0 },
{ "extended tabs", 0 },
{ "underline mode", 0 },
{ "define column", 0 },
{ "footnote attributes", 0 },
{ "paragraph numbering style", 0 },
{ "numbered paragraph", 0 },
{ "begin marked text", 0 },
{ "end marked text", 0 },
{ "define marked text", 0 },
{ "define index mark", 0 },
{ "define math columns", 0 },
{ "WPCorp obsolete", 0 },
{ "WPCorp reserved", 0 },
{ "WPCorp undefined", 0 },
};
/* The file pointer "descriptor" points to our descriptor file
* and "input" points to our input file.
*
* Kinda makes sense that way.
*/
FILE *descriptor, *input;
/* And the function match_identifier(s) takes a string and converts
* it to its corresponding integer. Or blows up if it couldn't
* find one.
*/
int match_identifier(const char *s)
{
Identifier *I;
/* Maybe it is a special character */
if (s[0] == '\'' && s[2] == '\'' && s[3] == '\0')
return (int) (unsigned char) s[1];
/* Else it must be a multi-character guy */
for (I = names; I->name; I++)
if (!strcmp(I->name, s)) return typeout + (I - names);
/* Otherwise, I don't know what to do with it */
error(descriptor, "Unknown identifier %s", s);
/*NOTREACHED*/
return 0;
}
/* check_arity ensures that the expansion string is valid */
void check_arity(int ident, char *t)
{
char *s;
int arity = 0;
if (ident > typeout) arity = names[ident-typeout].arity;
for (s = t; *s; s++) {
if (*s != '%') continue;
switch (*++s) {
case '\n':
if (s != t+1)
error(descriptor, "%s: `%%\\n' not at start of expansion",
names[ident-typeout].name);
break;
case '1':
case 'c':
if (arity < 1) goto bad_escape;
break;
case '2':
if (arity < 2) goto bad_escape;
break;
case '%':
break;
default:
bad_escape:
error(descriptor, "%s: invalid escape `%%%c'", names[ident-typeout].name, *s);
}
}
}
/************************************************************************/
/* Reading input from the descriptor file */
/************************************************************************/
/* The macro igetc() gets a character from the input file.
* the macro dgetc() gets a character from the descriptor file.
*/
#define igetc() getc(input)
#define dgetc() getc(descriptor)
/* expand_backslash() is called when a backslash is encountered in
* the descriptor file. Its job is to parse a backslash-sequence.
* The usual C-escapes (\a \b \f \n \r \t \v) are understood, as
* well as the octal escape \000 [up to three octal digits] and
* the hex escape \xFF [up to two hex digits].
*/
int expand_backslash(void) {
int c;
switch (c = dgetc()) {
case 'a': c = '\a'; break;
case 'b': c = '\b'; break;
case 'f': c = '\f'; break;
case 'n': c = '\n'; break;
case 'r': c = '\r'; break;
case 't': c = '\t'; break;
case 'v': c = '\v'; break;
case 'x':
case 'X': c = parse_hex(descriptor); break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7': c = parse_octal(descriptor, c); break;
default: /* c = c; */ break;
}
return c;
}
/* The function read_identifier() attempts to match an identifier
* in the descriptor file. It returns EOF if the end of the descriptor
* file was reached, or the code of the identifier we found.
* (or blows up if an error was detected.)
* We build the identifier in "s", with the help of our
* pool-managing functions above, then discard it, immediately,
* since we don't use it any more.
*/
int read_identifier(void)
{
register int c; /* A character we have read */
char *s; /* The identifier we are building */
int ident; /* The identifier we found */
/* Skip over comments */
while ((c = next_non_whitespace(descriptor)) == '#')
eat_until_newline(descriptor);
if (c == EOF) return EOF;
/* At this point, "c" contains the first letter of a potential
* identifier. Let's see what it could possibly be.
*/
s = anchor_string();
if (c == '\'') { /* a character token */
add_to_string(c);
if ((c = dgetc()) == '\\') c = expand_backslash();
add_to_string(c);
if ((c = dgetc()) != '\'')
error(descriptor, "Invalid character identifier");
add_to_string(c);
c = next_non_space_or_tab(descriptor);
} else do { /* a name token */
add_to_string(c);
c = next_non_space_or_tab(descriptor);
if (c == '\\') c = expand_backslash();
} while (c != EOF && c != '=' && c != '\n');
if (c != '=') error(descriptor, "Identifier not followed by = sign");
/* A boo-boo. Something bad happened. */
add_to_string('\0'); /* Make it a standard C string. */
finish_string();
ident = match_identifier(s); /* Go find one. */
remove_string(s); /* And we're done with it now. */
return ident;
}
/* The function grab_expansion() reads expansion text from the
* descriptor file and adds it to the pool, returning a pointer
* to the string it just created.
*
* After anchoring a new string, we look for the opening quotation
* mark, then start gobbling characters. Everything gets copied
* straight into the string.
*
*/
char *grab_expansion(void)
{
register int c; /* Characters being read */
char *s; /* The string we are building */
s = anchor_string();
if (next_non_whitespace(descriptor) != '\"')
error(descriptor, "Quotation mark expected");
/* Now read the stream until we hit another quotation mark. */
while ((c = dgetc()) != EOF && c != '\"') {
if (c == '\\') c = expand_backslash();
add_to_string(c);
}
add_to_string('\0');
finish_string();
return s;
}
/* Ah, now with all of these beautiful functions waiting for us,
* we can now write our first Useful Function: do_descriptor_file.
* It reads the descriptor file and loads up the "expansion" array
* with the text expansions we are reading from the file.
*
* If we grabbed the expansion of a "typeout", we type it out
* and discard the string.
*
* We stop when the descriptor file runs dry.
*
*/
void do_descriptor_file(void)
{
register int ident;
while ((ident = read_identifier()) != EOF) {
expansion[ident] = grab_expansion();
if (ident == typeout && !silent) {
fputs(expansion[typeout], stderr); remove_string(expansion[typeout]);
expansion[typeout] = NULL;
} else check_arity(ident, expansion[ident]);
}
}
/************************************************************************/
/* Reading from the input file */
/************************************************************************/
/* The function verify(c) checks that the next character in the input
* stream is indeed "c". It eats the character, if all is well.
* If something went wrong, we complain to stderr, but keep going.
*/
void verify(int c)
{
int d = igetc();
if (d != c) fprintf(stderr, "Warning: Expected %02X but received %02X.\n", c, d);
}
/* The function gobble(n) simply eats "n" characters from the input
* file.
*/
void gobble(int n)
{
while (n--) (void) igetc();
}
int last_HRt = 0; /* most recent output was HRt */
/* Processing a special code simply entails dumping its expansion.
* If the expansion is NULL, then we either
* [1] print nothing, if it is a code,
* [2] print the character itself, if it is an ASCII character.
*
* In dumping its expansion, we expand the following percent-escapes:
*
* The percent-escapes are:
* %\n -- newline if previous character was not a newline
* (meaningful only as first character in sequence)
* %1 -- first parameter, in decimal form
* %2 -- second parameter, in decimal form
* %c -- first parameter, in character form
* %% -- literal percent sign
*
* all other %-escapes are flagged as warnings (but should never occur,
* since they are trapped at the time the descriptor file is read.)
*/
void process(int c, int d1, int d2)
{
char *s;
static int last_newline = 0;
last_HRt = 0; /* the killer switch sets this */
if (expansion[c] == NULL) { /* invent a default action */
if (c >= ' ' && c < 128) {
putchar(c); /* regular characters emit themselves */
last_newline = 0;
return;
} else if (c < 256) { /* single character */
expansion[c] = anchor_string(); /* emits itself */
add_to_string(c); add_to_string('\0');
finish_string();
if (!silent) fprintf(stderr, "Warning: No expansion for %02X (%c)\n", c, c);
} else { /* provide null expansion */
expansion[c] = "";
if (!silent) {
fprintf(stderr, "Warning: No expansion for %s\n", names[c-typeout].name);
}
}
}
s = expansion[c];
if (!*s) return; /* the rest of the code assumes non-null string */
do {
if (*s != '%') putchar(*s++);
else {
s++;
switch (*s++) {
case '\n':
if (!last_newline) putchar('\n'); break;
case '1':
printf("%d", d1); break;
case '2':
printf("%d", d2); break;
case 'c':
putchar(d1); break;
case '%':
putchar('%'); break;
default:
fprintf(stderr, "Internal error: Invalid escape, %%%c\n", s[-1]);
break;
}
}
} while (*s);
last_newline = s[-1] == '\n';
}
#define process0(c) process(c,0,0)
#define process1(c,a) process(c,a,0)
#define process2(c,a,b) process(c,a,b)
void unsupported(int c)
{
if (!silent && !expansion[c]) {
expansion[c] = "";
fprintf(stderr, "Warning: `%s' code not supported\n", names[c-typeout].name);
}
process0(Comment); fputs(names[c-typeout].name, stdout); process0(eComment);
}
/* The function gobble_until(c) eats characters from the input file
* until it reaches a c or reaches EOF.
*/
void gobble_until(int c)
{
int i;
while ((i = igetc()) != EOF && (int) (unsigned char) i != c) do_nothing;
}
/* line_spacing(l) is called whenever we hit a line-spacing-change command.
* The argument is the desired line spacing, multiplied by two.
* So single spacing gets a 2, 1.5 spacing gets a 3, etc.
*/
void line_spacing(int l)
{
switch (l) {
case 2: process0(SS); break;
case 3: process0(OHS); break;
case 4: process0(DS); break;
case 6: process0(TS); break;
default: process1(LS, l); break;
}
}
int environment_status = 0; /* cleanup at HRt */
void leave_environment(int force_HRt) {
if (environment_status) {
process0(environment_status);
environment_status = 0;
}
if (force_HRt && !last_HRt) process0(HRt);
}
/* The "note_status" flag has one of three values:
* 0 if we are not inside a note
* 1 if we are inside a footnote
* 2 if we are inside an endnote
*
* The function handle_note() is called to deal with footnotes and
* endnotes. It adjusts the note_status accordingly.
*/
int note_status = 0;
void handle_note(void)
{
if (note_status) {
leave_environment(1); process0(note_status); note_status = 0;
} else { /* Decide whether it is an endnote or a footnote */
if (igetc() & 2) { process0(En); note_status = eEn; gobble(5); }
else { process0(Fn); note_status = eFn; gobble(7); }
verify(0xFF);
gobble(2); /* margins */
}
}
/* The tab_table is a bit field. Each set bit represents a tabstop.
* Note, however, that the bits are counted from MSB to LSB.
*
* The tab_attribute_table is a nybble field. The n'th nybble represents
* the attributes of the n'th tabstop.
*/
unsigned char tab_table[32];
unsigned char tab_attribute_table[20];
int next_attribute;
void process_tab_attribute(int i) {
int b;
if (next_attribute & 1) b = tab_attribute_table[next_attribute/2] & 3;
else b = (tab_attribute_table[next_attribute/2] / 16) & 3;
next_attribute++;
/* Bottom two bites define what kind of tab.
* Bit 2 is set if we need dot filling.
* Bit 3 is unused.
* We `&3' above because we won't support dot filling.
*/
process1(SetTab + b, i);
}
void process_tab_table(void) {
int i;
next_attribute = 0;
process0(BeginTabs);
for (i = 0; i < 32; i++) {
if (tab_table[i] == 0) continue; /* early out */
if (tab_table[i] & 0x80) process_tab_attribute(i * 8 + 0);
if (tab_table[i] & 0x40) process_tab_attribute(i * 8 + 1);
if (tab_table[i] & 0x20) process_tab_attribute(i * 8 + 2);
if (tab_table[i] & 0x10) process_tab_attribute(i * 8 + 3);
if (tab_table[i] & 0x08) process_tab_attribute(i * 8 + 4);
if (tab_table[i] & 0x04) process_tab_attribute(i * 8 + 5);
if (tab_table[i] & 0x02) process_tab_attribute(i * 8 + 6);
if (tab_table[i] & 0x01) process_tab_attribute(i * 8 + 7);
}
process0(EndTabs);
}
void handle_tabs(void) {
/* pad the tables to force no new tabs, and left tabs everywhere */
memset(tab_table, 0, sizeof(tab_table));
memset(tab_attribute_table, 0, sizeof(tab_attribute_table));
fread(tab_table, 20, 1, input); /* old-style tabs */
process_tab_table();
}
void handle_extended_tabs(void) {
fread(tab_table, 32, 1, input);
fread(tab_attribute_table, 20, 1, input);
process_tab_table();
}
/* The FF_status flag tells us what we should do when we encounter an 0xFF.
* It contains the token code of the active code, or 0 if no code is active.
*/
int FF_status = 0;
void handle_FF(void)
{
if (FF_status) { /* finish header/footer */
leave_environment(1);
process0(FF_status);
gobble(2);
verify(0xD1);
FF_status = 0;
} else process0(0xFF);
}
/* The function process_token does all of the real work.
* Given the first character of a token, we eat up everything
* that belongs to that token. This routine might be called
* recursively, since some tokens are defined in terms of other
* tokens. (For example, the subscript code is expanded as
* [Sub] <character being subscripted> [sub]
* and the <character being subscripted> might involve other token
* expansions; specifically, it might be an IBM Extended character.)
*
* Luckily, most of our tokens are not recursive. The macro
* bracket(before, after)
* does the recursive stuff for us, bracketing the next token
* between expansions of "before" and "after".
*
*/
#define bracket(before,after) process0(before); process_token(); \
process0(after);
int process_token(void)
{
int c = igetc();
if (c == EOF) return 0;
c = (int) (unsigned char) c;
if (!--blipcount && !silent) {
blipcount = blipinterval;
putc('.', stderr);
}
switch (c) { /* Codes listed in numerical rather than logical order */
case 0x02: process0(PageNo); break; /* Page number */
case 0x09: process0(Tab); break; /* Tab character */
case 0x8C: /* Soft page break after a HRt */
case 0x0A: /* Hard Return */
last_HRt = 0; leave_environment(1); last_HRt = 1; break;
case 0x0B: /* Soft page break after a SRt */
case 0x0D: process0(SRt); break; /* Soft Return */
case 0x0C: process0(HPg); break; /* Hard Page */
case '-' : process0(HHyph); break; /* Nonbreaking hyphen */
case 0x80: break; /* NOP */
case 0x81: process0(Just); break; /* Right justification */
case 0x82: process0(eJust); break; /* Ragged right */
case 0x83: /* End centering */
case 0x84: leave_environment(0); break; /* End aligned text */
case 0x85: process0(MathCalc); break; /* Begin math calc */
case 0x86: process0(CtrPg); break; /* Center page vertically */
case 0x87: process0(Col); break; /* Begin column mode */
case 0x88: process0(eCol); break; /* End column mode */
case 0x89: process0(Tab); break; /* Tab after right margin */
case 0x8A: process0(Wid); break; /* Widow/orphan protection */
case 0x8B: process0(eWid); break; /* Allow widows/orphans */
/* case 0x8C: see 0x0A */
case 0x8D: /* Footnote/Endnote number */
process0(note_status == eFn ? FNoteNum : ENoteNum); break;
case 0x8E:
case 0x8F: unsupported(ReservedCode); break; /* Reserved codes */
case 0x90: process0(Red); break; /* Begin redline */
case 0x91: process0(eRed); break; /* End redline */
case 0x92: process0(Strike); break; /* Begin strikeout */
case 0x93: process0(eStrike); break; /* End strikeout */
case 0x94: process0(Und); break; /* Begin underlining */
case 0x95: process0(eUnd); break; /* End underlining */
case 0x96: process0(Rev); break; /* Begin reverse video */
case 0x97: process0(eRev); break; /* End reverse video */
case 0x98: process0(TableMarker); break;/* Table of something marker */
case 0x99: bracket(Over, eOver); break; /* Overstrike */
case 0x9A: process0(NoHyphWord); break;/* Do not hyphenate this word */
case 0x9B: break; /* End of generated text */
case 0x9C: process0(eBold); break; /* End boldface */
case 0x9D: process0(Bold); break; /* Begin boldface */
case 0x9E: process0(eHyph); break; /* Forbid hyphenation */
case 0x9F: process0(Hyph); break; /* Allow hyphenation */
case 0xA0: process0(HSpace); break; /* Hard space */
case 0xA1: process0(SubTtl); break; /* Do subtotal */
case 0xA2: process0(IsSubTtl); break; /* Subtotal entry */
case 0xA3: process0(Ttl); break; /* Do total */
case 0xA4: process0(IsTtl); break; /* Total entry */
case 0xA5: process0(GrandTtl); break; /* Do grand total */
case 0xA6: process0(MathCalcColumn); break; /* Math calc column */
case 0xA7: process0(Math); break; /* Begin math mode */
case 0xA8: process0(eMath); break; /* End math mode */
case 0xA9: process0(NHyph); break; /* Normal breakable hyphen */
case 0xAA: /* Hyphen at end of line */
case 0xAB: process0(NHyphE); break; /* Hyphen at end of page */
case 0xAC: process0(DHyph); break; /* Discretionary hyphen */
case 0xAD: /* Discretionary hyphen at EOLn */
case 0xAE: process0(DHyphE); break; /* Discretionary hyphen at EOPg */
case 0xAF: /* EOT columns and EOLn */
case 0xB0: break; /* EOT columns and EOPg */
case 0xB1: process0(NegateTotal); break; /* Negate current total */
case 0xBC: bracket(Sup, eSup); break; /* Superscript */
case 0xBD: bracket(Sub, eSub); break; /* Subscript */
case 0xBE: process0(UpHalfLine); break; /* Advance 1/2 line up */
case 0xBF: process0(DownHalfLine); break; /* Advance 1/2 line down */
case 0xC0: gobble(2); c = igetc(); /* Margin change */
process2(Marg, c, igetc()); verify(0xC0); break;
case 0xC1: gobble(1); line_spacing(igetc()); verify(0xC1); break;
/* Line spacing change */
case 0xC2: process1(MargRel, igetc()); /* Margin release */
verify(0xC2); break;
case 0xC3: /* Center text */
leave_environment(0);
switch (igetc()) {
case 0: process0(Center); /* Center between margins */
environment_status = eCenter; break;
case 1: /* Center around current column */
process0(CenterHere);
environment_status = eCenterHere; break;
}
gobble(2); verify(0xC3); break;
case 0xC4: /* Align or Flush Right */
leave_environment(0);
c = igetc();
/* if high bit on c is set, then dot fill. (Ignore) */
switch (c & 0x7f) {
case 0x0C:
case 0x0A: process1(FlushRight, igetc());/* alignment col */
environment_status = eFlushRight;
break;
default: process2(Align, c, igetc());/* alignment column */
environment_status = eAlign;
break;
}
gobble(1); /* trash */
verify(0xC4);
break;
case 0xC5: gobble(2); c = igetc(); /* Hyphenation zone */
process2(HZone, c, igetc()); verify(0xC5); break;
case 0xC6: gobble(1); /* Page number position */
process0(PN + igetc()); verify(0xC6); break;
case 0xC7: gobble(2); c = igetc(); /* New page number */
c = (c<<8) + (unsigned char)igetc();
process1( (c&0x8000) ? RomanPage : ArabicPage, c&0x7fff);
verify(0xC7); break;
case 0xC8: gobble(3); /* Set Page number column */
/* next 3 bytes are <left> <center> <right> */
gobble(3);
unsupported(SetPageNumberColumn);
verify(0xC8); break;
case 0xC9: gobble(20); /* Set tabs */
handle_tabs();
verify(0xC9); break;
case 0xCA: process1(CondEOP, igetc()); /* Conditional end of page */
verify(0xCA); break;
case 0xCB: /* Set pitch or font */
gobble(2); /* old pitch and font */
c = igetc();
process2(SetFont, c, igetc()); /* pitch and font number */
/* negative pitch means proportional font */
verify(0xCB); break;
case 0xCC: /* Indented paragraph */
leave_environment(0);
gobble(1); process1(Indent, igetc()); verify(0xCC);
environment_status = eIndent; break;
/* (really: Temporary margin) */
case 0xCD: /* Indented paragraph (obsolete) */
leave_environment(0);
process1(Indent, igetc()); verify(0xCD);
environment_status = eIndent; break;
/* (really: Temporary margin) */
case 0xCE: gobble(1); process1(TopMarg, igetc()); /* Set top margin */
verify(0xCE); break;
case 0xCF: /* Suppress headers/footers for this page */
process1(Supp, (unsigned char)igetc());
verify(0xCF); break;
case 0xD0: gobble(2); /* old form length */ /* Set page length */
process1(PageLength, igetc()); /* lines per page */
gobble(1); /* new page length */
verify(0xD0); break;
case 0xD1: /* header/footer */
c = igetc(); /* def byte */
gobble(1); /* old half-lines */
if (c&2) { process0(Footer); FF_status = eFooter; }
else { process0(Header); FF_status = eHeader; }
verify(0xFF); verify(0xFF); /* separator */
gobble(2); /* left and right margin */
break; /* continue processing */
case 0xD2: gobble(5); /* obsolete footnote */
unsupported(Obsolete);
gobble_until(0xD2);
break;
case 0xD3: gobble(2); /* obsolete `set footnote number' */
unsupported(Obsolete);
verify(0xD3);
break;
case 0xD4: /* Advance to half line number */
c = igetc(); /* current line number */
process2(AdvanceToHalfLine, c, igetc());/* desired line # */
verify(0xD4); break;
case 0xD5: gobble(1); process1(LPI, igetc()); /* Set LPI (6 or 8) */
verify(0xD5); break;
case 0xD6: /* set extended tabs */
/* next 4 bytes are <old start><old increment>
<new start><new increment> */
gobble(4);
unsupported(SetTabs);
verify(0xD6); break;
case 0xD7: gobble(63); /* Define math columns */
unsupported(DefineMathColumns);
verify(0xD7); break;
case 0xD8: gobble(1); process1(AlignChar, igetc());
verify(0xD8); break; /* Set alignment character */
case 0xD9: gobble(2); /* obsolete margin release */
unsupported(Obsolete);
verify(0xD9);
break;
case 0xDA: gobble(1+1); /* Set underline mode */
/* second byte is a bit field.
* 1 = double-underline (default single),
* 2 = underline spaces (default don't)
*/
unsupported(SetUnderlineMode);
verify(0xDA); break;
case 0xDB: /* Set sheet feeder bin */
gobble(1); process1(SetBin, igetc());
verify(0xDB); break;
/* We ignore these codes, since they are followed by an 0x0C or an 0x8C */
case 0xDC: gobble(7); verify(0xDC); break; /* End-of-page codes */
case 0xDD: gobble(22); /* define columns */
unsupported(DefineColumn);
verify(0xDD);
case 0xDE: environment_status = 0; /* End indented paragraph */
gobble(2); process0(eIndent); verify(0xDE); break;
case 0xDF: /* invisible characters */
gobble_until(0xDF);
break;
case 0xE0: /* Doubly-indented paragraph */
leave_environment(0);
gobble(1); process1(DIndent, igetc()); verify(0xE0);
environment_status = eIndent; break;
case 0xE1: process0((unsigned char)igetc()); verify(0xE1); break;
/* IBM character */
case 0xE2: handle_note(); break; /* footnote or endnote */
case 0xE3: gobble(74+74); /* footnote attributes */
unsupported(SetFootnoteAttributes);
verify(0xE3);
break;
case 0xE4: gobble(2); /* old */ /* set footnote number */
/* bit 7 of second byte doesn't count, and the value
* is offset by one.
*/
c = igetc() & 0x3f;
c = (c << 7) + (igetc() & 0x7f);
process1(SetFnNum, 1 + c);
verify(0xE4);
break;
case 0xE5: /* paragraph numbering style */
gobble(7+7+7+7);
unsupported(SetParagraphNumberingStyle);
verify(0xE5);
break;
case 0xE6: /* paragraph number */
gobble(2+7);
unsupported(NumberedParagraph);
verify(0xE6);
break;
case 0xE9: /* begin marked text */
gobble(6);
unsupported(BeginMarkedText);
verify(0xE9);
break;
case 0xEA: /* end marked text */
unsupported(EndMarkedText);
gobble_until(0xEA);
break;
case 0xEB: /* define marked text */
gobble(30);
unsupported(DefineMarkedText);
verify(0xEB);
break;
case 0xEC: /* define index mark */
gobble(2);
unsupported(DefineIndexMark);
verify(0xEC);
break;
case 0xED: /* Table of authorities */
unsupported(DefineIndexMark);
gobble_until(0xED);
break;
case 0xEE: /* paragraph number def */
gobble(42);
unsupported(SetParagraphNumberingStyle);
verify(0xEE);
break;
case 0xEF: /* paragraph number */
gobble(16);
unsupported(NumberedParagraph);
verify(0xEF);
break;
case 0xF1: gobble(32 + 20); /* Tab settings */
handle_extended_tabs();
verify(0xF1);
break;
case 0xF3: /* column definition */
gobble(98);
unsupported(DefineColumn);
verify(0xF3);
break;
case 0xB2:
case 0xB3:
case 0xB4:
case 0xB5:
case 0xB6:
case 0xB7:
case 0xB8:
case 0xB9:
case 0xBA:
case 0xF0:
case 0xF2:
case 0xF4:
case 0xF5:
case 0xF6:
case 0xF7:
case 0xF8:
case 0xF9:
case 0xFA:
case 0xFB:
case 0xFC:
case 0xFD:
case 0xFE: unsupported(UnknownCode); break; /* undefined codes */
case 0xFF: handle_FF(); break;
default: process0(c); break;
}
return 1;
}
/* Now do the other Useful Function.
*/
void process_input(void)
{
process0(BEGIN);
while (process_token()) do_nothing;
process0(END);
}
/************************************************************************/
/* The main program */
/************************************************************************/
/* First, a pretty little function which tries to open a file and
* complains loudly if it cannot.
*/
FILE *efopen(const char *s, const char *m)
{
FILE *fp = fopen(s, m);
if (fp == NULL) {
fprintf(stderr, "Error: Cannot open %s", s);
if (errno > 0 && errno < sys_nerr)
fprintf(stderr, " (%s)\n", s, sys_errlist[errno]);
fprintf(stderr, "\n");
exit(1);
}
return fp;
}
#include "dopen.c" /* ickiness with file opening */
/* Our main program does very little, really.
*
* After checking the command line, it proceeds to open the descriptor
* file in text mode, and the input file in binary mode.
* It then calls our two Useful Functions in turn, closing each file
* after it has served its purpose.
*/
int Cdecl main(int argc, char **argv)
{
while (--argc && **++argv == '-') {
while (*++*argv) switch (**argv) {
case 's': silent = 1; break;
case 'n': blipinterval = atoi(&argv[0][1]); goto finarg;
default: goto usage;
}
finarg: ;
}
blipcount = blipinterval;
if (argc != 2) {
usage:
fprintf(stderr, "usage: wp2x descriptor input > output\n");
exit(2);
}
dopen(argv[0]);
input = efopen(argv[1], "rb");
do_descriptor_file();
fclose(descriptor);
process_input();
fclose(input);
return 0;
}